1   package org.apache.lucene.util.automaton;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  //import java.io.IOException;
21  //import java.io.PrintWriter;
22  
23  import java.util.Arrays;
24  import java.util.BitSet;
25  import java.util.Collection;
26  import java.util.Collections;
27  import java.util.HashSet;
28  import java.util.Set;
29  
30  import org.apache.lucene.util.Accountable;
31  import org.apache.lucene.util.ArrayUtil;
32  import org.apache.lucene.util.InPlaceMergeSorter;
33  import org.apache.lucene.util.RamUsageEstimator;
34  import org.apache.lucene.util.Sorter;
35  
36  
37  
38  
39  // TODO
40  //   - could use packed int arrays instead
41  //   - could encode dest w/ delta from to?
42  
43  /** Represents an automaton and all its states and transitions.  States
44   *  are integers and must be created using {@link #createState}.  Mark a
45   *  state as an accept state using {@link #setAccept}.  Add transitions
46   *  using {@link #addTransition}.  Each state must have all of its
47   *  transitions added at once; if this is too restrictive then use
48   *  {@link Automaton.Builder} instead.  State 0 is always the
49   *  initial state.  Once a state is finished, either
50   *  because you've starting adding transitions to another state or you
51   *  call {@link #finishState}, then that states transitions are sorted
52   *  (first by min, then max, then dest) and reduced (transitions with
53   *  adjacent labels going to the same dest are combined).
54   *
55   * @lucene.experimental */
56  
57  public class Automaton implements Accountable {
58  
59    /** Where we next write to the int[] states; this increments by 2 for
60     *  each added state because we pack a pointer to the transitions
61     *  array and a count of how many transitions leave the state.  */
62    private int nextState;
63  
64    /** Where we next write to in int[] transitions; this
65     *  increments by 3 for each added transition because we
66     *  pack min, max, dest in sequence. */
67    private int nextTransition;
68  
69    /** Current state we are adding transitions to; the caller
70     *  must add all transitions for this state before moving
71     *  onto another state. */
72    private int curState = -1;
73  
74    /** Index in the transitions array, where this states
75     *  leaving transitions are stored, or -1 if this state
76     *  has not added any transitions yet, followed by number
77     *  of transitions. */
78    private int[] states;
79  
80    private final BitSet isAccept;
81    
82    /** Holds toState, min, max for each transition. */
83    private int[] transitions;
84  
85    /** True if no state has two transitions leaving with the same label. */
86    private boolean deterministic = true;
87  
88    /** Sole constructor; creates an automaton with no states. */
89    public Automaton() {
90       this(2, 2);
91    }
92  
93    /**
94     * Constructor which creates an automaton with enough space for the given
95     * number of states and transitions.
96     * 
97     * @param numStates
98     *           Number of states.
99     * @param numTransitions
100    *           Number of transitions.
101    */
102   public Automaton(int numStates, int numTransitions) {
103      states = new int[numStates * 2];
104      isAccept = new BitSet(numStates);
105      transitions = new int[numTransitions * 3];
106   }
107 
108   /** Create a new state. */
109   public int createState() {
110     growStates();
111     int state = nextState/2;
112     states[nextState] = -1;
113     nextState += 2;
114     return state;
115   }
116 
117   /** Set or clear this state as an accept state. */
118   public void setAccept(int state, boolean accept) {
119     if (state >= getNumStates()) {
120       throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
121     }
122     if (accept) {
123       isAccept.set(state);
124     } else {
125       isAccept.clear(state);
126     }
127   }
128 
129   /** Sugar to get all transitions for all states.  This is
130    *  object-heavy; it's better to iterate state by state instead. */
131   public Transition[][] getSortedTransitions() {
132     int numStates = getNumStates();
133     Transition[][] transitions = new Transition[numStates][];
134     for(int s=0;s<numStates;s++) {
135       int numTransitions = getNumTransitions(s);
136       transitions[s] = new Transition[numTransitions];
137       for(int t=0;t<numTransitions;t++) {
138         Transition transition = new Transition();
139         getTransition(s, t, transition);
140         transitions[s][t] = transition;
141       }
142     }
143 
144     return transitions;
145   }
146 
147   /** Returns accept states.  If the bit is set then that state is an accept state. */
148   BitSet getAcceptStates() {
149     return isAccept;
150   }
151 
152   /** Returns true if this state is an accept state. */
153   public boolean isAccept(int state) {
154     return isAccept.get(state);
155   }
156 
157   /** Add a new transition with min = max = label. */
158   public void addTransition(int source, int dest, int label) {
159     addTransition(source, dest, label, label);
160   }
161 
162   /** Add a new transition with the specified source, dest, min, max. */
163   public void addTransition(int source, int dest, int min, int max) {
164     assert nextTransition%3 == 0;
165 
166     if (source >= nextState/2) {
167       throw new IllegalArgumentException("source=" + source + " is out of bounds (maxState is " + (nextState/2-1) + ")");
168     }
169     if (dest >= nextState/2) {
170       throw new IllegalArgumentException("dest=" + dest + " is out of bounds (max state is " + (nextState/2-1) + ")");
171     }
172 
173     growTransitions();
174     if (curState != source) {
175       if (curState != -1) {
176         finishCurrentState();
177       }
178 
179       // Move to next source:
180       curState = source;
181       if (states[2*curState] != -1) {
182         throw new IllegalStateException("from state (" + source + ") already had transitions added");
183       }
184       assert states[2*curState+1] == 0;
185       states[2*curState] = nextTransition;
186     }
187 
188     transitions[nextTransition++] = dest;
189     transitions[nextTransition++] = min;
190     transitions[nextTransition++] = max;
191 
192     // Increment transition count for this state
193     states[2*curState+1]++;
194   }
195 
196   /** Add a [virtual] epsilon transition between source and dest.
197    *  Dest state must already have all transitions added because this
198    *  method simply copies those same transitions over to source. */
199   public void addEpsilon(int source, int dest) {
200     Transition t = new Transition();
201     int count = initTransition(dest, t);
202     for(int i=0;i<count;i++) {
203       getNextTransition(t);
204       addTransition(source, t.dest, t.min, t.max);
205     }
206     if (isAccept(dest)) {
207       setAccept(source, true);
208     }
209   }
210 
211   /** Copies over all states/transitions from other.  The states numbers
212    *  are sequentially assigned (appended). */
213   public void copy(Automaton other) {
214 
215     // Bulk copy and then fixup the state pointers:
216     int stateOffset = getNumStates();
217     states = ArrayUtil.grow(states, nextState + other.nextState);
218     System.arraycopy(other.states, 0, states, nextState, other.nextState);
219     for(int i=0;i<other.nextState;i += 2) {
220       if (states[nextState+i] != -1) {
221         states[nextState+i] += nextTransition;
222       }
223     }
224     nextState += other.nextState;
225     int otherNumStates = other.getNumStates();
226     BitSet otherAcceptStates = other.getAcceptStates();
227     int state = 0;
228     while (state < otherNumStates && (state = otherAcceptStates.nextSetBit(state)) != -1) {
229       setAccept(stateOffset + state, true);
230       state++;
231     }
232 
233     // Bulk copy and then fixup dest for each transition:
234     transitions = ArrayUtil.grow(transitions, nextTransition + other.nextTransition);
235     System.arraycopy(other.transitions, 0, transitions, nextTransition, other.nextTransition);
236     for(int i=0;i<other.nextTransition;i += 3) {
237       transitions[nextTransition+i] += stateOffset;
238     }
239     nextTransition += other.nextTransition;
240 
241     if (other.deterministic == false) {
242       deterministic = false;
243     }
244   }
245 
246   /** Freezes the last state, sorting and reducing the transitions. */
247   private void finishCurrentState() {
248     int numTransitions = states[2*curState+1];
249     assert numTransitions > 0;
250 
251     int offset = states[2*curState];
252     int start = offset/3;
253     destMinMaxSorter.sort(start, start+numTransitions);
254 
255     // Reduce any "adjacent" transitions:
256     int upto = 0;
257     int min = -1;
258     int max = -1;
259     int dest = -1;
260 
261     for(int i=0;i<numTransitions;i++) {
262       int tDest = transitions[offset+3*i];
263       int tMin = transitions[offset+3*i+1];
264       int tMax = transitions[offset+3*i+2];
265 
266       if (dest == tDest) {
267         if (tMin <= max+1) {
268           if (tMax > max) {
269             max = tMax;
270           }
271         } else {
272           if (dest != -1) {
273             transitions[offset+3*upto] = dest;
274             transitions[offset+3*upto+1] = min;
275             transitions[offset+3*upto+2] = max;
276             upto++;
277           }
278           min = tMin;
279           max = tMax;
280         }
281       } else {
282         if (dest != -1) {
283           transitions[offset+3*upto] = dest;
284           transitions[offset+3*upto+1] = min;
285           transitions[offset+3*upto+2] = max;
286           upto++;
287         }
288         dest = tDest;
289         min = tMin;
290         max = tMax;
291       }
292     }
293 
294     if (dest != -1) {
295       // Last transition
296       transitions[offset+3*upto] = dest;
297       transitions[offset+3*upto+1] = min;
298       transitions[offset+3*upto+2] = max;
299       upto++;
300     }
301 
302     nextTransition -= (numTransitions-upto)*3;
303     states[2*curState+1] = upto;
304 
305     // Sort transitions by min/max/dest:
306     minMaxDestSorter.sort(start, start+upto);
307 
308     if (deterministic && upto > 1) {
309       int lastMax = transitions[offset+2];
310       for(int i=1;i<upto;i++) {
311         min = transitions[offset + 3*i + 1];
312         if (min <= lastMax) {
313           deterministic = false;
314           break;
315         }
316         lastMax = transitions[offset + 3*i + 2];
317       }
318     }
319   }
320 
321   /** Returns true if this automaton is deterministic (for ever state
322    *  there is only one transition for each label). */
323   public boolean isDeterministic() {
324     return deterministic;
325   }
326 
327   /** Finishes the current state; call this once you are done adding
328    *  transitions for a state.  This is automatically called if you
329    *  start adding transitions to a new source state, but for the last
330    *  state you add you need to this method yourself. */
331   public void finishState() {
332     if (curState != -1) {
333       finishCurrentState();
334       curState = -1;
335     }
336   }
337 
338   // TODO: add finish() to shrink wrap the arrays?
339 
340   /** How many states this automaton has. */
341   public int getNumStates() {
342     return nextState/2;
343   }
344 
345   /** How many transitions this automaton has. */
346   public int getNumTransitions() {
347     return nextTransition / 3;   
348   }
349   
350   /** How many transitions this state has. */
351   public int getNumTransitions(int state) {
352     assert state >= 0;
353     int count = states[2*state+1];
354     if (count == -1) {
355       return 0;
356     } else {
357       return count;
358     }
359   }
360 
361   private void growStates() {
362     if (nextState+2 >= states.length) {
363       states = ArrayUtil.grow(states, nextState+2);
364     }
365   }
366 
367   private void growTransitions() {
368     if (nextTransition+3 >= transitions.length) {
369       transitions = ArrayUtil.grow(transitions, nextTransition+3);
370     }
371   }
372 
373   /** Sorts transitions by dest, ascending, then min label ascending, then max label ascending */
374   private final Sorter destMinMaxSorter = new InPlaceMergeSorter() {
375 
376       private void swapOne(int i, int j) {
377         int x = transitions[i];
378         transitions[i] = transitions[j];
379         transitions[j] = x;
380       }
381 
382       @Override
383       protected void swap(int i, int j) {
384         int iStart = 3*i;
385         int jStart = 3*j;
386         swapOne(iStart, jStart);
387         swapOne(iStart+1, jStart+1);
388         swapOne(iStart+2, jStart+2);
389       };
390 
391       @Override
392       protected int compare(int i, int j) {
393         int iStart = 3*i;
394         int jStart = 3*j;
395 
396         // First dest:
397         int iDest = transitions[iStart];
398         int jDest = transitions[jStart];
399         if (iDest < jDest) {
400           return -1;
401         } else if (iDest > jDest) {
402           return 1;
403         }
404 
405         // Then min:
406         int iMin = transitions[iStart+1];
407         int jMin = transitions[jStart+1];
408         if (iMin < jMin) {
409           return -1;
410         } else if (iMin > jMin) {
411           return 1;
412         }
413 
414         // Then max:
415         int iMax = transitions[iStart+2];
416         int jMax = transitions[jStart+2];
417         if (iMax < jMax) {
418           return -1;
419         } else if (iMax > jMax) {
420           return 1;
421         }
422 
423         return 0;
424       }
425     };
426 
427   /** Sorts transitions by min label, ascending, then max label ascending, then dest ascending */
428   private final Sorter minMaxDestSorter = new InPlaceMergeSorter() {
429 
430       private void swapOne(int i, int j) {
431         int x = transitions[i];
432         transitions[i] = transitions[j];
433         transitions[j] = x;
434       }
435 
436       @Override
437       protected void swap(int i, int j) {
438         int iStart = 3*i;
439         int jStart = 3*j;
440         swapOne(iStart, jStart);
441         swapOne(iStart+1, jStart+1);
442         swapOne(iStart+2, jStart+2);
443       };
444 
445       @Override
446       protected int compare(int i, int j) {
447         int iStart = 3*i;
448         int jStart = 3*j;
449 
450         // First min:
451         int iMin = transitions[iStart+1];
452         int jMin = transitions[jStart+1];
453         if (iMin < jMin) {
454           return -1;
455         } else if (iMin > jMin) {
456           return 1;
457         }
458 
459         // Then max:
460         int iMax = transitions[iStart+2];
461         int jMax = transitions[jStart+2];
462         if (iMax < jMax) {
463           return -1;
464         } else if (iMax > jMax) {
465           return 1;
466         }
467 
468         // Then dest:
469         int iDest = transitions[iStart];
470         int jDest = transitions[jStart];
471         if (iDest < jDest) {
472           return -1;
473         } else if (iDest > jDest) {
474           return 1;
475         }
476 
477         return 0;
478       }
479     };
480 
481   /** Initialize the provided Transition to iterate through all transitions
482    *  leaving the specified state.  You must call {@link #getNextTransition} to
483    *  get each transition.  Returns the number of transitions
484    *  leaving this state. */
485   public int initTransition(int state, Transition t) {
486     assert state < nextState/2: "state=" + state + " nextState=" + nextState;
487     t.source = state;
488     t.transitionUpto = states[2*state];
489     return getNumTransitions(state);
490   }
491 
492   /** Iterate to the next transition after the provided one */
493   public void getNextTransition(Transition t) {
494     // Make sure there is still a transition left:
495     assert (t.transitionUpto+3 - states[2*t.source]) <= 3*states[2*t.source+1];
496 
497     // Make sure transitions are in fact sorted:
498     assert transitionSorted(t);
499 
500     t.dest = transitions[t.transitionUpto++];
501     t.min = transitions[t.transitionUpto++];
502     t.max = transitions[t.transitionUpto++];
503   }
504 
505   private boolean transitionSorted(Transition t) {
506 
507     int upto = t.transitionUpto;
508     if (upto == states[2*t.source]) {
509       // Transition isn't initialzed yet (this is the first transition); don't check:
510       return true;
511     }
512 
513     int nextDest = transitions[upto];
514     int nextMin = transitions[upto+1];
515     int nextMax = transitions[upto+2];
516     if (nextMin > t.min) {
517       return true;
518     } else if (nextMin < t.min) {
519       return false;
520     }
521 
522     // Min is equal, now test max:
523     if (nextMax > t.max) {
524       return true;
525     } else if (nextMax < t.max) {
526       return false;
527     }
528 
529     // Max is also equal, now test dest:
530     if (nextDest > t.dest) {
531       return true;
532     } else if (nextDest < t.dest) {
533       return false;
534     }
535 
536     // We should never see fully equal transitions here:
537     return false;
538   }
539 
540   /** Fill the provided {@link Transition} with the index'th
541    *  transition leaving the specified state. */
542   public void getTransition(int state, int index, Transition t) {
543     int i = states[2*state] + 3*index;
544     t.source = state;
545     t.dest = transitions[i++];
546     t.min = transitions[i++];
547     t.max = transitions[i++];
548   }
549 
550   static void appendCharString(int c, StringBuilder b) {
551     if (c >= 0x21 && c <= 0x7e && c != '\\' && c != '"') b.appendCodePoint(c);
552     else {
553       b.append("\\\\U");
554       String s = Integer.toHexString(c);
555       if (c < 0x10) b.append("0000000").append(s);
556       else if (c < 0x100) b.append("000000").append(s);
557       else if (c < 0x1000) b.append("00000").append(s);
558       else if (c < 0x10000) b.append("0000").append(s);
559       else if (c < 0x100000) b.append("000").append(s);
560       else if (c < 0x1000000) b.append("00").append(s);
561       else if (c < 0x10000000) b.append("0").append(s);
562       else b.append(s);
563     }
564   }
565 
566   /*
567   public void writeDot(String fileName) {
568     if (fileName.indexOf('/') == -1) {
569       fileName = "/l/la/lucene/core/" + fileName + ".dot";
570     }
571     try {
572       PrintWriter pw = new PrintWriter(fileName);
573       pw.println(toDot());
574       pw.close();
575     } catch (IOException ioe) {
576       throw new RuntimeException(ioe);
577     }
578   }
579   */
580 
581   /** Returns the dot (graphviz) representation of this automaton.
582    *  This is extremely useful for visualizing the automaton. */
583   public String toDot() {
584     // TODO: breadth first search so we can see get layered output...
585 
586     StringBuilder b = new StringBuilder();
587     b.append("digraph Automaton {\n");
588     b.append("  rankdir = LR\n");
589     final int numStates = getNumStates();
590     if (numStates > 0) {
591       b.append("  initial [shape=plaintext,label=\"0\"]\n");
592       b.append("  initial -> 0\n");
593     }
594 
595     Transition t = new Transition();
596 
597     for(int state=0;state<numStates;state++) {
598       b.append("  ");
599       b.append(state);
600       if (isAccept(state)) {
601         b.append(" [shape=doublecircle,label=\"" + state + "\"]\n");
602       } else {
603         b.append(" [shape=circle,label=\"" + state + "\"]\n");
604       }
605       int numTransitions = initTransition(state, t);
606       //System.out.println("toDot: state " + state + " has " + numTransitions + " transitions; t.nextTrans=" + t.transitionUpto);
607       for(int i=0;i<numTransitions;i++) {
608         getNextTransition(t);
609         //System.out.println("  t.nextTrans=" + t.transitionUpto + " t=" + t);
610         assert t.max >= t.min;
611         b.append("  ");
612         b.append(state);
613         b.append(" -> ");
614         b.append(t.dest);
615         b.append(" [label=\"");
616         appendCharString(t.min, b);
617         if (t.max != t.min) {
618           b.append('-');
619           appendCharString(t.max, b);
620         }
621         b.append("\"]\n");
622         //System.out.println("  t=" + t);
623       }
624     }
625     b.append('}');
626     return b.toString();
627   }
628 
629   /**
630    * Returns sorted array of all interval start points.
631    */
632   int[] getStartPoints() {
633     Set<Integer> pointset = new HashSet<>();
634     pointset.add(Character.MIN_CODE_POINT);
635     //System.out.println("getStartPoints");
636     for (int s=0;s<nextState;s+=2) {
637       int trans = states[s];
638       int limit = trans+3*states[s+1];
639       //System.out.println("  state=" + (s/2) + " trans=" + trans + " limit=" + limit);
640       while (trans < limit) {
641         int min = transitions[trans+1];
642         int max = transitions[trans+2];
643         //System.out.println("    min=" + min);
644         pointset.add(min);
645         if (max < Character.MAX_CODE_POINT) {
646           pointset.add(max + 1);
647         }
648         trans += 3;
649       }
650     }
651     int[] points = new int[pointset.size()];
652     int n = 0;
653     for (Integer m : pointset) {
654       points[n++] = m;
655     }
656     Arrays.sort(points);
657     return points;
658   }
659 
660   /**
661    * Performs lookup in transitions, assuming determinism.
662    * 
663    * @param state starting state
664    * @param label codepoint to look up
665    * @return destination state, -1 if no matching outgoing transition
666    */
667   public int step(int state, int label) {
668     assert state >= 0;
669     assert label >= 0;
670     int trans = states[2*state];
671     int limit = trans + 3*states[2*state+1];
672     // TODO: we could do bin search; transitions are sorted
673     while (trans < limit) {
674       int dest = transitions[trans];
675       int min = transitions[trans+1];
676       int max = transitions[trans+2];
677       if (min <= label && label <= max) {
678         return dest;
679       }
680       trans += 3;
681     }
682 
683     return -1;
684   }
685 
686   /** Records new states and transitions and then {@link
687    *  #finish} creates the {@link Automaton}.  Use this
688    *  when you cannot create the Automaton directly because
689    *  it's too restrictive to have to add all transitions
690    *  leaving each state at once. */
691   public static class Builder {
692     private int nextState = 0;
693     private final BitSet isAccept;
694     private int[] transitions;
695     private int nextTransition = 0;
696 
697     /** Default constructor, pre-allocating for 16 states and transitions. */
698     public Builder() {
699        this(16, 16);
700     }
701 
702     /**
703      * Constructor which creates a builder with enough space for the given
704      * number of states and transitions.
705      * 
706      * @param numStates
707      *           Number of states.
708      * @param numTransitions
709      *           Number of transitions.
710      */
711     public Builder(int numStates, int numTransitions) {
712        isAccept = new BitSet(numStates);
713        transitions = new int[numTransitions * 4];
714     }
715 
716     /** Add a new transition with min = max = label. */
717     public void addTransition(int source, int dest, int label) {
718       addTransition(source, dest, label, label);
719     }
720 
721     /** Add a new transition with the specified source, dest, min, max. */
722     public void addTransition(int source, int dest, int min, int max) {
723       if (transitions.length < nextTransition+4) {
724         transitions = ArrayUtil.grow(transitions, nextTransition+4);
725       }
726       transitions[nextTransition++] = source;
727       transitions[nextTransition++] = dest;
728       transitions[nextTransition++] = min;
729       transitions[nextTransition++] = max;
730     }
731 
732     /** Add a [virtual] epsilon transition between source and dest.
733      *  Dest state must already have all transitions added because this
734      *  method simply copies those same transitions over to source. */
735     public void addEpsilon(int source, int dest) {
736       for (int upto = 0; upto < nextTransition; upto += 4) {
737          if (transitions[upto] == dest) {
738             addTransition(source, transitions[upto + 1], transitions[upto + 2], transitions[upto + 3]);
739          }
740       }
741       if (isAccept(dest)) {
742         setAccept(source, true);
743       }
744     }
745 
746     /** Sorts transitions first then min label ascending, then
747      *  max label ascending, then dest ascending */
748     private final Sorter sorter = new InPlaceMergeSorter() {
749 
750         private void swapOne(int i, int j) {
751           int x = transitions[i];
752           transitions[i] = transitions[j];
753           transitions[j] = x;
754         }
755 
756         @Override
757         protected void swap(int i, int j) {
758           int iStart = 4*i;
759           int jStart = 4*j;
760           swapOne(iStart, jStart);
761           swapOne(iStart+1, jStart+1);
762           swapOne(iStart+2, jStart+2);
763           swapOne(iStart+3, jStart+3);
764         };
765 
766         @Override
767         protected int compare(int i, int j) {
768           int iStart = 4*i;
769           int jStart = 4*j;
770 
771           // First src:
772           int iSrc = transitions[iStart];
773           int jSrc = transitions[jStart];
774           if (iSrc < jSrc) {
775             return -1;
776           } else if (iSrc > jSrc) {
777             return 1;
778           }
779 
780           // Then min:
781           int iMin = transitions[iStart+2];
782           int jMin = transitions[jStart+2];
783           if (iMin < jMin) {
784             return -1;
785           } else if (iMin > jMin) {
786             return 1;
787           }
788 
789           // Then max:
790           int iMax = transitions[iStart+3];
791           int jMax = transitions[jStart+3];
792           if (iMax < jMax) {
793             return -1;
794           } else if (iMax > jMax) {
795             return 1;
796           }
797 
798           // First dest:
799           int iDest = transitions[iStart+1];
800           int jDest = transitions[jStart+1];
801           if (iDest < jDest) {
802             return -1;
803           } else if (iDest > jDest) {
804             return 1;
805           }
806 
807           return 0;
808         }
809       };
810 
811     /** Compiles all added states and transitions into a new {@code Automaton}
812      *  and returns it. */
813     public Automaton finish() {
814       // Create automaton with the correct size.
815       int numStates = nextState;
816       int numTransitions = nextTransition / 4;
817       Automaton a = new Automaton(numStates, numTransitions);
818       
819       // Create all states.
820       for (int state = 0; state < numStates; state++) {
821          a.createState();
822          a.setAccept(state, isAccept(state));
823       }
824       
825       // Create all transitions
826       sorter.sort(0, numTransitions);
827       for (int upto = 0; upto < nextTransition; upto += 4) {
828         a.addTransition(transitions[upto],
829                         transitions[upto+1],
830                         transitions[upto+2],
831                         transitions[upto+3]);
832       }
833 
834       a.finishState();
835       
836       return a;
837     }
838 
839     /** Create a new state. */
840     public int createState() {
841       return nextState++;
842     }
843 
844     /** Set or clear this state as an accept state. */
845     public void setAccept(int state, boolean accept) {
846       if (state >= getNumStates()) {
847         throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
848       }
849       
850       this.isAccept.set(state, accept);
851     }
852 
853     /** Returns true if this state is an accept state. */
854     public boolean isAccept(int state) {
855       return this.isAccept.get(state);
856     }
857 
858     /** How many states this automaton has. */
859     public int getNumStates() {
860       return nextState;
861     }
862 
863     /** Copies over all states/transitions from other. */
864     public void copy(Automaton other) {
865       int offset = getNumStates();
866       int otherNumStates = other.getNumStates();
867 
868       // Copy all states
869       copyStates(other);
870       
871       // Copy all transitions
872       Transition t = new Transition();
873       for(int s=0;s<otherNumStates;s++) {
874         int count = other.initTransition(s, t);
875         for(int i=0;i<count;i++) {
876           other.getNextTransition(t);
877           addTransition(offset + s, offset + t.dest, t.min, t.max);
878         }
879       }
880     }
881 
882     /** Copies over all states from other. */
883     public void copyStates(Automaton other) {
884       int otherNumStates = other.getNumStates();
885       for (int s = 0; s < otherNumStates; s++) {
886         int newState = createState();
887         setAccept(newState, other.isAccept(s));
888       }
889     }
890   }
891 
892   @Override
893   public long ramBytesUsed() {
894     // TODO: BitSet RAM usage (isAccept.size()/8) isn't fully accurate...
895     return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.sizeOf(states) + RamUsageEstimator.sizeOf(transitions) +
896       RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + (isAccept.size() / 8) + RamUsageEstimator.NUM_BYTES_OBJECT_REF +
897       2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF +
898       3 * RamUsageEstimator.NUM_BYTES_INT +
899       RamUsageEstimator.NUM_BYTES_BOOLEAN;
900   }
901 
902   @Override
903   public Collection<Accountable> getChildResources() {
904     return Collections.emptyList();
905   }
906 }